## Replication Code
## Accountability for the Local Economy at All Levels of Government in United States Elections
## Justin de Benedictis-Kessner Christopher Warshaw
## Run this code 1st


##----------------##
### Preliminary ####
##----------------##

library(foreign)
library(dplyr)
library(tidyr)
library(readr)
library(stringr)

##--------------##
### Read data ####
##--------------##

## Population
unemployment <- read.dta(file="Economy/unemployment.dta")	

qcew_income <- read.dta(file="Economy/qcew_income.dta")	

county_income_bea <- read.csv(file="Economy/CAINC4__ALL_AREAS_1969_2018.csv")
county_income_bea <- select(county_income_bea, -Unit)

county_income_bea_pop <- subset(county_income_bea,Description=="Population (persons) 3/")
dim(county_income_bea_pop)
county_income_bea_employment <- subset(county_income_bea,Description==" Wage and salary employment")
dim(county_income_bea_employment)
county_income_bea_wages <- subset(county_income_bea,Description=="Wages and salaries")
dim(county_income_bea_wages)
county_income_bea <- subset(county_income_bea,Description=="Equals: Net earnings by place of residence")
dim(county_income_bea)

county_income_bea_wages <- gather(county_income_bea_wages, key=year, value=wages, X1969:X2018)
county_income_bea_wages <- select(county_income_bea_wages,GeoFIPS,year,wages)
colnames(county_income_bea_wages)[1] <- "fips"
county_income_bea_wages$year <- gsub("X", "", county_income_bea_wages$year)
county_income_bea_wages$fips <- as.numeric(as.vector(county_income_bea_wages$fips))
county_income_bea_wages <- filter(county_income_bea_wages,fips!=0)

county_income_bea <- gather(county_income_bea, key=year, value=earnings, X1969:X2018)
county_income_bea <- select(county_income_bea, GeoFIPS,year,earnings)
colnames(county_income_bea)[1] <- "fips"
county_income_bea$year <- gsub("X", "", county_income_bea$year)
county_income_bea$fips <- as.numeric(as.vector(county_income_bea$fips))
county_income_bea <- filter(county_income_bea,fips!=0)

county_income_bea_pop <- gather(county_income_bea_pop, key=year, value=population, X1969:X2018)
county_income_bea_pop <- select(county_income_bea_pop,GeoFIPS,year,population)
colnames(county_income_bea_pop)[1] <- "fips"
county_income_bea_pop$year <- gsub("X", "", county_income_bea_pop$year)
county_income_bea_pop$fips <- as.numeric(as.vector(county_income_bea_pop$fips))
county_income_bea_pop <- filter(county_income_bea_pop,fips!=0)

county_income_bea_employment <- gather(county_income_bea_employment, key=year, value=employment, X1969:X2018)
county_income_bea_employment <- select(county_income_bea_employment,GeoFIPS,year,employment)
colnames(county_income_bea_employment)[1] <- "fips"
county_income_bea_employment$year <- gsub("X", "", county_income_bea_employment$year)
county_income_bea_employment$fips <- as.numeric(as.vector(county_income_bea_employment$fips))
county_income_bea_employment <- filter(county_income_bea_employment,fips!=0)

county_income_bea2 <- merge(county_income_bea, county_income_bea_pop, by=c("fips", "year"))
county_income_bea2 <- merge(county_income_bea2, county_income_bea_employment, by=c("fips", "year"))
county_income_bea2 <- merge(county_income_bea2, county_income_bea_wages, by=c("fips", "year"))

county_income_bea2$population <- as.numeric(as.vector(county_income_bea2$population))
county_income_bea2$employment <- as.numeric(as.vector(county_income_bea2$employment))
county_income_bea2$earnings <- as.numeric(as.vector(county_income_bea2$earnings))
county_income_bea2$wages <- as.numeric(as.vector(county_income_bea2$wages))

county_income_bea2$earnings_perworker <- county_income_bea2$earnings/county_income_bea2$employment
county_income_bea2$wages_perworker <- county_income_bea2$wages/county_income_bea2$employment

county_income_bea2$earnings_pc <- county_income_bea2$earnings/county_income_bea2$population
county_income_bea2$employment_pc <- county_income_bea2$employment/county_income_bea2$population

tax_data<-0
if(tax_data==1){
### tax income
	county_income <- read.dta(file="Economy/county_income.dta")

	county_income10 <- read_csv(file="Economy/county_income10.csv")
	county_income10 <- county_income10[,(1:8)]
	county_income10$year <- 2010
	county_income10 <- rename(county_income10, county=county_name)
	county_income11 <- read_csv(file="Economy/county_income11.csv")
	county_income11 <- county_income11[,(1:8)]
	county_income11$year <- 2011
	county_income11 <- rename(county_income11, county=county_name)
	county_income12 <- read_csv(file="Economy/county_income12.csv")
	county_income12 <- county_income12[,(1:8)]
	county_income12$year <- 2012
	county_income12 <- rename(county_income12, county=county_name)
	county_income13 <- read_csv(file="Economy/county_income13.csv")
	county_income13 <- county_income13[,(1:8)]
	county_income13$year <- 2013
	county_income13 <- rename(county_income13, county=county_name)
	county_income14 <- read_csv(file="Economy/county_income14.csv")
	county_income14 <- county_income14[,(1:8)]
	county_income14$year <- 2014
	county_income14 <- rename(county_income14, county=county_name)
	county_income15 <- read_csv(file="Economy/county_income15.csv")
	county_income15 <- county_income15[,(1:8)]
	county_income15$year <- 2015
	county_income15 <- rename(county_income15, county=county_name)
	county_income16 <- read_csv(file="Economy/county_income16.csv")
	county_income16 <- county_income16[,(1:8)]
	county_income16$year <- 2016
	county_income16 <- rename(county_income16, county=county_name)
	county_income2 <- merge(county_income10, county_income11, all.x=T, all.y=T)
	county_income2 <- merge(county_income2, county_income12, all.x=T, all.y=T)
	county_income2 <- merge(county_income2, county_income13, all.x=T, all.y=T)
	county_income2 <- merge(county_income2, county_income14, all.x=T, all.y=T)
	county_income2 <- merge(county_income2, county_income15, all.x=T, all.y=T)
	county_income2 <- merge(county_income2, county_income16, all.x=T, all.y=T)

	county_income2$wages <- as.numeric(as.vector(county_income2$wages))
	county_income$wages <- as.numeric(as.vector(county_income$wages))

	county_income2$exemptions <- as.numeric(as.vector(county_income2$exemptions))
	county_income$exemptions <- as.numeric(as.vector(county_income$exemptions))

	county_income2$agi <- as.numeric(as.vector(county_income2$agi))
	county_income$agi <- as.numeric(as.vector(county_income$agi))

	county_income2 <- dplyr::rename(county_income2, wages_irs=wages)

	county_income <- dplyr::rename(county_income, wages_irs=wages)

	county_income$state_fips <- as.numeric(as.vector(county_income$state_fips))
	county_income <- merge(county_income, county_income2, all.x=T, all.y=T)

	county_income <- subset(county_income, !is.na(county_fips))
	county_income$fips <- 0
	county_income$fips[nchar(county_income$county_fips)==3] <- paste(county_income$state_fips[nchar(county_income$county_fips)==3], county_income$county_fips[nchar(county_income$county_fips)==3], sep="")

	county_income$fips[nchar(county_income$county_fips)==2] <- paste(county_income$state_fips[nchar(county_income$county_fips)==2], "0",county_income$county_fips[nchar(county_income$county_fips)==2], sep="")

	county_income$fips[nchar(county_income$county_fips)==1] <- paste(county_income$state_fips[nchar(county_income$county_fips)==1], "00",county_income$county_fips[nchar(county_income$county_fips)==1], sep="")

	county_income$fips <- as.numeric(as.vector(county_income$fips))
	county_income <- filter(county_income,!is.na(fips) & fips!=0)
	table(county_income$year)
	table(county_income$state_fips)
	county_income$state_fips_numeric <- as.numeric(as.vector(county_income$state_fips))
	county_income <- merge(county_income, s, by.x=c("state_fips_numeric"), by.y=c("STATEFIP"), all.x=T)

	county_income$county <- as.vector(county_income$county)
	county_income$county  <-  iconv(county_income$county,"WINDOWS-1252","UTF-8")

	county_income$county <- tolower(county_income$county)
	county_income$county <- gsub(" county", "", county_income$county)
	county_income$state <- gsub("Countyincome/", "", county_income$state)
	county_income$state <- gsub("Countyincomew/", "", county_income$state)
	county_income$state <- gsub("ci.Xls", "", county_income$state)
	county_income$state <- gsub("cir.Xls", "", county_income$state)
	county_income$state <- gsub("Northcarolina", "North Carolina", county_income$state)
	county_income$state <- gsub("Northdakota", "North Dakota", county_income$state)
	county_income$state <- gsub("Southcarolina", "South Carolina", county_income$state)
	county_income$state <- gsub("Westvirginia", "West Virginia", county_income$state)
	county_income$state <- gsub("Newhampshire", "New Hampshire", county_income$state)
	county_income$state <- gsub("Newjersey", "New Jersey", county_income$state)
	county_income$state <- gsub("Newmexico", "New Mexico", county_income$state)
	county_income$state <- gsub("Newyork", "New York", county_income$state)
	table(county_income$state)
	county_income <- dplyr::select(county_income, -state_fips_numeric, -state_fips, -state_abb)
}

s <- read.csv(paste("StateCodes.csv",sep=""))
s <- s[,c(8,9)]
s$Name <- as.vector(s$Name)

table(county_income_bea2$year)


	county_income2 <- county_income_bea2
	cpi <- read.csv("Economy/cpi.csv", na="",
                    stringsAsFactors=FALSE)
	s <- read.csv(paste("Economy/states_icpsr.csv",sep=""))
	s <- s[,c(1,4)]

if(tax_data==1){
	county_income2 <- merge(county_income2, county_income, by.x=c("fips", "year"), by.y=c("fips", "year"), all.x=T, all.y=T)
}
	county_income2 <- merge(county_income2, qcew_income, by.x=c("fips", "year"), by.y=c("fips", "year"), all.x=T, all.y=T)
	county_income2 <- merge(county_income2, unemployment, by.x=c("fips", "year"), by.y=c("county_fips2", "year"), all.x=T)

	county_income2 <- dplyr::select(county_income2, -county.y)
	county_income2 <- dplyr::rename(county_income2, county=county.x)
	county_income2 <- dplyr::select(county_income2, -county_fips.y)
	county_income2 <- dplyr::rename(county_income2, county_fips=county_fips.x)
	county_income2 <- merge(county_income2, cpi, by.x=c( "year"), by.y=c( "year"), all.x=T)

if(tax_data==1){
	county_income2$agi <- as.numeric(as.vector(county_income2$agi))
	county_income2$returns <- as.numeric(as.vector(county_income2$returns))
	county_income2$exemptions <- as.numeric(as.vector(county_income2$exemptions))
}

qcew_income <- subset(qcew_income, !is.na(fips))

county_income2$cpi2015_multiplier <- as.numeric(as.vector(county_income2$cpi2015_multiplier))
if(tax_data==1){
	county_income2$agi_real <- as.numeric(as.vector(county_income2$agi))*(county_income2$cpi2015_multiplier)
	county_income2$agi_real_pc <- county_income2$agi_real/county_income2$exemptions*1000
	county_income2$wages_irs_real <- as.numeric(as.vector(county_income2$wages_irs))*(county_income2$cpi2015_multiplier)
	county_income2$wages_irs_real_pc <- county_income2$wages_irs_real/county_income2$exemptions*1000
}

table(county_income2$year)

if(tax_data==1){
	agi <- dplyr::select(county_income2,fips,year, agi_real_pc, wages_irs_real_pc , exemptions)
	agi <- subset(agi, !is.na(agi_real_pc))
	write.dta(agi, file="Economy/agi.dta")
}
county_income2$earnings_perworker <- as.numeric(as.vector(county_income2$earnings_perworker))
county_income2$earnings_pc <- as.numeric(as.vector(county_income2$earnings_pc))
county_income2$wages_perworker <- as.numeric(as.vector(county_income2$wages_perworker))

county_income2$earnings_perworker_real <- as.numeric(as.vector(county_income2$earnings_perworker))/(county_income2$cpi2015_multiplier/100)

county_income2$earnings_pc_real <- as.numeric(as.vector(county_income2$earnings_pc))*(county_income2$cpi2015_multiplier*1000)
county_income2$wages_perworker_real <- as.numeric(as.vector(county_income2$wages_perworker))*(county_income2$cpi2015_multiplier*1000)
county_income2$unemployment_rate <- as.numeric(as.vector(county_income2$unemployment_rate))
county_income2$qcew_pay_perworker_real <- as.numeric(as.vector(county_income2$QCEW_annual_pay))*(county_income2$cpi2015_multiplier)

## To check sample-based measures of income vs. BEA measures:
# agi_real_pc - tax based gross-adjusted income
# unemployment_rate - ACS based unemployment_rate
# wages_perworker_real - BEA based real income
# cor(county_income2$wages_perworker_real,county_income2$wages_perworker_real, use="complete.obs")
# cor(county_income2$wages_perworker_real,county_income2$qcew_pay_perworker_real, use="complete.obs")

####
## Validation and relationships between measures
####
validation <- 0
if (validation==1){
	summary(lm(scale(unemployment_rate)~scale(agi_real_pc)+factor(year),data=county_income2[county_income2$year>1989,]))
	summary(lm(scale(unemployment_rate)~scale(earnings_pc_real)+factor(year),data=county_income2[county_income2$year>1989,]))
	summary(lm(scale(unemployment_rate)~scale(earnings_perworker_real)+factor(year),data=county_income2[county_income2$year>1989,]))
	summary(lm(scale(unemployment_rate)~scale(qcew_pay_perworker_real)+factor(year),data=county_income2[county_income2$year>1989,]))
	
	summary(lm(scale(unemployment_rate)~scale(qcew_pay_perworker_real)+factor(year),data=county_income2[county_income2$year>1989,]))
	summary(lm(scale(earnings_perworker_real)~scale(qcew_pay_perworker_real)+factor(year),data=county_income2[county_income2$year>1989,]))
	#summary(lm(scale(earnings_perworker_real)~scale(earnings_perworker_real)+factor(year),data=county_income2[county_income2$year>1989,]))
	summary(lm(scale(agi_real_pc)~scale(qcew_pay_perworker_real)+factor(year),data=county_income2[county_income2$year>1989,]))
	### ahah! Lenz and Healy data are same as raw QCEW data
	summary(lm(scale(annual_wage)~scale(QCEW_annual_pay)+factor(year),data=county_income2[county_income2$year>1989,]))
	
	summary(lm(scale(unemployment_rate)~scale(earnings_perworker_real),data=county_income2[county_income2$year==2006,]))
	summary(lm(scale(earnings_perworker_real)~scale(qcew_pay_perworker_real),data=county_income2[county_income2$year==2006,]))
	summary(lm(scale(agi_real_pc)~scale(earnings_perworker_real),data=county_income2[county_income2$year==2006,]))
	
	summary(lm(scale(unemployment_rate)~scale(qcew_pay_perworker_real),data=county_income2[county_income2$year==2006,]))
	summary(lm(scale(earnings_perworker_real)~scale(qcew_pay_perworker_real),data=county_income2[county_income2$year==2006,]))
	summary(lm(scale(agi_real_pc)~scale(qcew_pay_perworker_real),data=county_income2[county_income2$year==2006,]))
	#summary(lm(scale(earnings_perworker_real)~scale(earnings_perworker_real),data=county_income2[county_income2$year==2006,]))
	
}

county_income2$pres_dem_incumbent <- NA
county_income2$pres_dem_incumbent[(county_income2$year>1968 & county_income2$year<1977) | (county_income2$year>1980 &county_income2$year<1993) | (county_income2$year>2000 &county_income2$year<2009)| (county_income2$year>2016 &county_income2$year<2020)] <-  -1


county_income2$pres_dem_incumbent[(county_income2$year>1959 &county_income2$year<1969) |(county_income2$year>1976 &county_income2$year<1981) | (county_income2$year>1992 &county_income2$year<2001) | (county_income2$year>2008 &county_income2$year<2017)] <-  1

county_income2$pres_dem_incumbent2 <- county_income2$pres_dem_incumbent
county_income2$pres_dem_incumbent2[county_income2$pres_dem_incumbent==-1] <-  0

county_income2   <-  arrange(county_income2,fips, year) 

county_income2$year <- as.numeric(as.vector(county_income2$year))
county_income2   <-  arrange(county_income2,fips, year) 


county_income2$qcew_pay_perworker_real2 <- county_income2$qcew_pay_perworker_real/max(county_income2$qcew_pay_perworker_real, na.rm=T)
county_income2$earnings_perworker_real2 <- county_income2$earnings_perworker_real/max(county_income2$earnings_perworker_real, na.rm=T)
if(tax_data==1){
	county_income2$agi_real_pc2 <- county_income2$agi_real_pc/max(county_income2$agi_real_pc, na.rm=T)
}
county_income2$unemployment_rate2 <- county_income2$unemployment_rate/max(county_income2$unemployment_rate, na.rm=T)

dim(county_income2) # 162726 
county_income2$duplicate <- duplicated(paste(county_income2$fips, county_income2$year))
county_income2 <- subset(county_income2, duplicate==F)
dim(county_income2) # 162401 - removed 325

county_income2$population <- as.numeric(as.vector(county_income2$population))

county_income2[county_income2==""] <- NA

county_income2$period  <-  NA

county_income2$period[county_income2$year<1993]  <-  0
county_income2$period[county_income2$year>1992]  <-  1

## ---------------- ##
#### Output files ####
## ---------------- ##
# write.dta(county_income2, file="county_income_merged.dta")
econ <- county_income2
save(econ, file="county_income.RData")

